{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## crawler class "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import urllib.request\n",
    "import urllib.parse\n",
    "from bs4 import BeautifulSoup\n",
    "import sqlite3\n",
    "import re\n",
    "\n",
    "# Create a list of words to ignore\n",
    "ignorewords={'the':1,'of':1,'to':1,'and':1,'a':1,'in':1,'is':1,'it':1}\n",
    "\n",
    "class Crawler:\n",
    "    # Initialize the crawler with the name of database\n",
    "    def __init__(self, db_name):\n",
    "        self.con = sqlite3.connect(db_name, timeout=10)\n",
    "    \n",
    "    def __del__(self):\n",
    "        self.con.close()\n",
    "        \n",
    "    def db_commit(self):\n",
    "        self.con.commit()\n",
    "    \n",
    "    # Axillary function for getting an entry id and adding\n",
    "    # it if it's not present\n",
    "    def get_entry_id(self, table, field, value, create_new=True):\n",
    "        cur=self.con.execute(\"select rowid from %s where %s='%s'\" % (table,field,value))\n",
    "        res=cur.fetchone()\n",
    "        if res==None:\n",
    "            cur=self.con.execute(\"insert into %s (%s) values ('%s')\" % (table,field,value))\n",
    "            return cur.lastrowid\n",
    "        else:\n",
    "            return res[0] \n",
    "    \n",
    "    # Create index for every page\n",
    "    def add_to_index(self, url, soup):\n",
    "        if self.is_indexed(url):\n",
    "            return \n",
    "        print('Indexing %s' % url)\n",
    "        \n",
    "        # Get the individual words\n",
    "        text = self.get_text_only(soup)\n",
    "        words = self.separate_words(text)\n",
    "        \n",
    "        # Get the id of URL\n",
    "        url_id = self.get_entry_id('urllist', 'url', url)\n",
    "        \n",
    "        # Link each word to this url\n",
    "        for i in range(len(words)):\n",
    "            word = words[i]\n",
    "            if word in ignorewords:\n",
    "                continue\n",
    "            word_id = self.get_entry_id('wordlist','word',word)\n",
    "            self.con.execute(\"insert into wordlocation(urlid,wordid,location) \\\n",
    "                             values (%d,%d,%d)\" % (url_id,word_id,i))\n",
    "        \n",
    "    # Extract the text from an HTML page (no tags)\n",
    "    def get_text_only(self, soup):\n",
    "        v = soup.string\n",
    "        if v==None:\n",
    "            c = soup.contents\n",
    "            result_text=''\n",
    "            for t in c:\n",
    "                sub_text = self.get_text_only(t)\n",
    "                result_text += sub_text + '\\n'\n",
    "            return result_text\n",
    "        else:\n",
    "            return v.strip()\n",
    "    \n",
    "    # Separate words by any non_whitespace character\n",
    "    def separate_words(self, text):\n",
    "        splitter = re.compile('\\\\W*')\n",
    "        return [s.lower() for s in splitter.split(text) if s!='']\n",
    "    \n",
    "    # Return true if the url is already indexed \n",
    "    def is_indexed(self, url):\n",
    "        u = self.con.execute(\"select rowid from urllist where url='%s'\" % url).fetchone()\n",
    "        if u != None:\n",
    "            v = self.con.execute(\"select * from wordlocation where urlid=%d\" % u[0]).fetchone()\n",
    "            if v != None:\n",
    "                return True\n",
    "        return False\n",
    "    \n",
    "    # Add a link between two pages\n",
    "    def add_link_ref(self, urlFrom, urlTo, linkText):\n",
    "        words = self.separate_words(linkText)\n",
    "        from_id = self.get_entry_id('urllist', 'url', urlFrom)\n",
    "        to_id = self.get_entry_id('urllist', 'url', urlTo)\n",
    "        if from_id == to_id:\n",
    "            return\n",
    "        cur = self.con.execute('insert into link(fromid, toid) values (%d, %d)' % (from_id, to_id))\n",
    "        link_id = cur.lastrowid\n",
    "        for word in words:\n",
    "            if word in ignorewords:\n",
    "                continue\n",
    "            word_id = self.get_entry_id('wordlist', 'word', word)\n",
    "            self.con.execute(\"insert into linkwords(linkid,wordid) values (%d,%d)\" % (link_id,word_id))\n",
    "    \n",
    "    # Starting with a list of pages, do a breadth \n",
    "    # first search to the given depth, indexing pages \n",
    "    # as we go\n",
    "    def crawl(self, pages, depth=2):\n",
    "        for i in range(depth):\n",
    "            print('depth %d begins' % i)\n",
    "            new_pages = set()\n",
    "            for page in pages:\n",
    "                try:\n",
    "                    c = urllib.request.urlopen(page)\n",
    "                except:\n",
    "                    print('Could not open %s' % page)\n",
    "                    continue\n",
    "                soup = BeautifulSoup(c.read(),'lxml')\n",
    "                self.add_to_index(page, soup)\n",
    "                \n",
    "                links = soup('a')\n",
    "                for link in links:\n",
    "                    if ('href' in dict(link.attrs)):\n",
    "                        url = urllib.parse.urljoin(page, link['href'])\n",
    "                        if url.find(\"'\")!=-1:\n",
    "                            continue\n",
    "                        url=url.split('#')[0]  # remove location portion\n",
    "                        if url[0:4]=='http' and not self.is_indexed(url):\n",
    "                            new_pages.add(url)\n",
    "                        link_text = self.get_text_only(link)\n",
    "                        self.add_link_ref(page, url, link_text)\n",
    "                self.db_commit()\n",
    "            pages = new_pages\n",
    "    \n",
    "    # Create the database tables\n",
    "    def create_index_tables(self):\n",
    "        self.con.execute('create table urllist(url)')\n",
    "        self.con.execute('create table wordlist(word)')\n",
    "        self.con.execute('create table wordlocation(urlid,wordid,location)')\n",
    "        self.con.execute('create table link(fromid integer,toid integer)')\n",
    "        self.con.execute('create table linkwords(wordid,linkid)')\n",
    "        self.con.execute('create index wordidx on wordlist(word)')\n",
    "        self.con.execute('create index urlidx on urllist(url)')\n",
    "        self.con.execute('create index wordurlidx on wordlocation(wordid)')\n",
    "        self.con.execute('create index urltoidx on link(toid)')\n",
    "        self.con.execute('create index urlfromidx on link(fromid)')\n",
    "        self.db_commit()\n",
    "        \n",
    "    def calculate_pagerank(self, iterations=20):\n",
    "        # clear out the current PageRank tables\n",
    "        self.con.execute('drop table if exists pagerank')\n",
    "        self.con.execute('create table pagerank(urlid primary key, score)')\n",
    "        \n",
    "        # Initialize every url with a pagerank of 1\n",
    "        self.con.execute('insert into pagerank select rowid, 1.0 from urllist')\n",
    "        self.db_commit()\n",
    "        \n",
    "        for i in range(iterations):\n",
    "            print('Iteration %d' % i)\n",
    "            for (url_id,) in self.con.execute('select rowid from urllist'):\n",
    "                pr = 0.15\n",
    "                \n",
    "                # Loop through all the pages that link to this one\n",
    "                for (linker,) in self.con.execute('select distinct fromid from link where toid=%d' % url_id):\n",
    "                    # Get the pagerank of the linker \n",
    "                    linking_pr = self.con.execute('select score from pagerank where urlid=%d' % linker).fetchone()[0]\n",
    "                    \n",
    "                    # Get the total number of links from the linker\n",
    "                    linking_count = self.con.execute('select count(*) from link where fromid=%d' % linker).fetchone()[0]\n",
    "                    pr += 0.85*(linking_pr/linking_count)\n",
    "                self.con.execute('update pagerank set score=%f where urlid=%d' % (pr, url_id))\n",
    "            self.db_commit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "crawler = Crawler('searchindex.db')\n",
    "# Run create_index_tables when db hasn't been created\n",
    "crawler.create_index_tables()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "depth 0 begins\n",
      "Indexing http://www.bilibili.com\n",
      "{'http://www.bilibili.com/video/ent-handmake-1.html', 'http://www.bilibili.com/video/part-twoelement-1.html', 'http://weibo.com/bilibiliweb', 'http://www.bilibili.com/video/game.html', 'http://www.bilibili.com/video/music-Cover-1.html', 'http://www.bilibili.com/event', 'http://zb.bilibili.com/', 'http://big.bilibili.com/site/big.html', 'http://www.bilibili.com/video/music-perform-1.html', 'http://bml.bilibili.com/index2016/?nav', 'http://www.bilibili.com/video/douga-kichiku-1.html', 'http://live.bilibili.com/i', 'http://www.bilibili.com/video/music-original-1.html', 'http://www.bilibili.com/video/life.html', 'http://space.bilibili.com/', 'http://www.bilibili.com/video/ent-circle-1.html', 'http://activity.bilibili.com', 'https://account.bilibili.com/login', 'http://www.bilibili.com/video/ad-ad-1.html', 'http://bangumi.bilibili.com/anime/timeline', 'http://www.bilibili.com/video/ent-animal-1.html', 'http://www.bilibili.com/video/fashion-info-1.html', 'https://pay.bilibili.com/', 'http://www.bilibili.com/video/ent.html', 'http://yoo.bilibili.com/index.html', 'http://www.bilibili.com/video/douga-voice-1.html', 'http://www.bilibili.com/video/dance-demo-1.html', 'http://www.bilibili.com/video/ent-food-1.html', 'http://www.bilibili.com/html/aboutUs.html', 'http://www.bilibili.com/video/dance.html', 'http://www.bilibili.com/video/music-vocaloid-1.html', 'http://www.bilibili.com/video/three-dimension-dance-1.html', 'http://www.bilibili.com/video/ent-Kichiku-1.html', 'http://www.shjbzx.cn', 'http://member.bilibili.com/v/', 'http://www.bilibili.com/video/kichiku-manual_vocaloid-1.html', 'http://www.bilibili.com/', 'http://www.bilibili.com/random', 'http://www.bilibili.com/video/game-mugen-1.html', 'http://www.bilibili.com/video/online.html', 'https://account.bilibili.com/site/home', 'http://live.bilibili.com/otaku', 'http://www.bilibili.com/video/music-game-1.html', 'http://www.bilibili.com/ranking', 'http://www.bilibili.com/html/cele.html', 'http://member.bilibili.com/v/video/submit.html', 'http://bangumi.bilibili.com/movie/', 'http://www.bilibili.com/html/help.html', 'http://planet2017.bilibili.com/', 'http://www.bilibili.com/video/fashion.html', 'http://www.bilibili.com/video/technology.html', 'http://live.bilibili.com/sing-dance', 'http://live.bilibili.com/movie', 'http://www.bilibili.com/html/friends-links.html', 'http://www.bilibili.com/video/tech-future-military-1.html', 'http://www.bilibili.com/html/contact.html', 'http://www.bilibili.com/video/bangumi-two-1.html', 'https://www.bilibili.com/register', 'http://www.bilibili.com/video/ent-korea-1.html', 'http://www.bilibili.com/video/ent_funny_1.html', 'http://www.bilibili.com/video/fashion-clothing-1.html', 'http://www.bilibili.com/video/music-coordinate-1.html', 'http://message.bilibili.com', 'http://live.bilibili.com/single', 'http://www.bilibili.com/html/copyright.html', 'http://news.bilibili.com', 'http://www.bilibili.com/video/music.html', 'http://www.bilibili.com/video/tech-future-digital-1.html', 'http://bangumi.bilibili.com/anime/index', 'http://live.bilibili.com/e-sports', 'http://link.acg.tv', 'http://activity.bilibili.com/', 'http://www.bilibili.com/video/ent-sports-1.html', 'http://bangumi.bilibili.com/22/', 'http://www.bilibili.com/video/game-video-1.html', 'http://www.bilibili.com/mango', 'http://www.bilibili.com/video/music-collection-1.html', 'https://account.bilibili.com/login?act=exit', 'http://www.bilibili.com/video/bagumi_offical_1.html', 'http://www.bilibili.com/newlist.html', 'http://www.bilibili.com/video/tech-future-other-1.html', 'http://www.bilibili.com/video/av120040/', 'http://www.bilibili.com/video/fashion-body-1.html', 'http://live.bilibili.com/mobile-game', 'http://www.bilibili.com/square', 'http://www.bilibili.com/video/game-ctary-network-1.html', 'http://zb.bilibili.com', 'http://www.bilibili.com/video/douga.html', 'http://www.bilibili.com/video/douga-mad-1.html', 'http://game.bilibili.com/', 'http://live.bilibili.com/subject', 'http://www.bilibili.com/video/ent-variety-1.html', 'http://www.bilibili.com/account/dynamic', 'http://www.bilibili.com/video/music-oped-1.html', 'http://h.bilibili.com/wallpaper?action=list', 'http://www.bilibili.com/video/ent-painting-1.html', 'http://live.bilibili.com/online', 'http://www.bilibili.com/video/kichiku.html', 'http://www.bilibili.com/video/tech-wild-1.html', 'http://www.bilibili.com/video/speech-course-1.html', 'http://www.bilibili.com/video/tech-popular-science-1.html', 'http://live.bilibili.com/mobile', 'http://www.bilibili.com/account/history', 'http://www.bilibili.com/video/douga-else-information-1.html', 'http://live.bilibili.com/draw', 'http://app.bilibili.com/', 'http://www.bilibili.com/video/ent-life-1.html', 'http://www.bilibili.com/html/join.html', 'http://www.bilibili.com/video/fashion-makeup-fitness-1.html', 'http://bangumi.bilibili.com/33/', 'http://h.bilibili.com/', 'http://www.bilibili.com/video/douga-mmd-1.html', 'http://www.bilibili.com/video/kichiku-course-1.html', 'http://www.bilibili.com/video/gmv-1.html', 'http://www.bilibili.com/video/bangumi_chinese_1.html', 'http://game.bilibili.com', 'http://bangumi.bilibili.com/tv/', 'http://www.bilibili.com/video/douga-else-1.html', 'http://live.bilibili.com', 'https://account.bilibili.com/answer/addq', 'http://app.bilibili.com', 'http://www.bilibili.com/video/tech-fun-1.html', 'http://www.bilibili.com/video/dance-1.html'}\n",
      "depth 1 begins\n",
      "Indexing http://www.bilibili.com/video/ent-handmake-1.html\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "//anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:69: FutureWarning: split() requires a non-empty pattern match.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Indexing http://www.bilibili.com/video/part-twoelement-1.html\n",
      "Indexing http://weibo.com/bilibiliweb\n",
      "Indexing http://www.bilibili.com/video/game.html\n",
      "Indexing http://www.bilibili.com/video/music-Cover-1.html\n",
      "Indexing http://www.bilibili.com/event\n",
      "Indexing http://zb.bilibili.com/\n",
      "Indexing http://big.bilibili.com/site/big.html\n",
      "Indexing http://www.bilibili.com/video/music-perform-1.html\n",
      "Indexing http://bml.bilibili.com/index2016/?nav\n",
      "Indexing http://www.bilibili.com/video/douga-kichiku-1.html\n",
      "Indexing http://live.bilibili.com/i\n",
      "Indexing http://www.bilibili.com/video/music-original-1.html\n",
      "Indexing http://www.bilibili.com/video/life.html\n",
      "Indexing http://space.bilibili.com/\n",
      "Indexing http://www.bilibili.com/video/ent-circle-1.html\n",
      "Indexing http://activity.bilibili.com\n",
      "Indexing https://account.bilibili.com/login\n",
      "Indexing http://www.bilibili.com/video/ad-ad-1.html\n",
      "Indexing http://bangumi.bilibili.com/anime/timeline\n",
      "Indexing http://www.bilibili.com/video/ent-animal-1.html\n",
      "Indexing http://www.bilibili.com/video/fashion-info-1.html\n",
      "Indexing https://pay.bilibili.com/\n",
      "Indexing http://www.bilibili.com/video/ent.html\n",
      "Indexing http://yoo.bilibili.com/index.html\n",
      "Indexing http://www.bilibili.com/video/douga-voice-1.html\n",
      "Indexing http://www.bilibili.com/video/dance-demo-1.html\n",
      "Indexing http://www.bilibili.com/video/ent-food-1.html\n",
      "Indexing http://www.bilibili.com/html/aboutUs.html\n",
      "Indexing http://www.bilibili.com/video/dance.html\n",
      "Indexing http://www.bilibili.com/video/music-vocaloid-1.html\n",
      "Indexing http://www.bilibili.com/video/three-dimension-dance-1.html\n",
      "Indexing http://www.bilibili.com/video/ent-Kichiku-1.html\n",
      "Indexing http://www.shjbzx.cn\n",
      "Indexing http://member.bilibili.com/v/\n",
      "Indexing http://www.bilibili.com/video/kichiku-manual_vocaloid-1.html\n",
      "Indexing http://www.bilibili.com/\n",
      "Indexing http://www.bilibili.com/random\n",
      "Indexing http://www.bilibili.com/video/game-mugen-1.html\n",
      "Indexing http://www.bilibili.com/video/online.html\n",
      "Indexing https://account.bilibili.com/site/home\n",
      "Indexing http://live.bilibili.com/otaku\n",
      "Indexing http://www.bilibili.com/video/music-game-1.html\n",
      "Indexing http://www.bilibili.com/ranking\n",
      "Indexing http://www.bilibili.com/html/cele.html\n",
      "Indexing http://member.bilibili.com/v/video/submit.html\n",
      "Indexing http://bangumi.bilibili.com/movie/\n",
      "Indexing http://www.bilibili.com/html/help.html\n",
      "Indexing http://planet2017.bilibili.com/\n",
      "Indexing http://www.bilibili.com/video/fashion.html\n",
      "Indexing http://www.bilibili.com/video/technology.html\n",
      "Indexing http://live.bilibili.com/sing-dance\n",
      "Indexing http://live.bilibili.com/movie\n",
      "Indexing http://www.bilibili.com/html/friends-links.html\n",
      "Indexing http://www.bilibili.com/video/tech-future-military-1.html\n",
      "Indexing http://www.bilibili.com/html/contact.html\n",
      "Indexing http://www.bilibili.com/video/bangumi-two-1.html\n",
      "Indexing https://www.bilibili.com/register\n",
      "Indexing http://www.bilibili.com/video/ent-korea-1.html\n",
      "Indexing http://www.bilibili.com/video/ent_funny_1.html\n",
      "Indexing http://www.bilibili.com/video/fashion-clothing-1.html\n",
      "Indexing http://www.bilibili.com/video/music-coordinate-1.html\n",
      "Indexing http://message.bilibili.com\n",
      "Indexing http://live.bilibili.com/single\n",
      "Indexing http://www.bilibili.com/html/copyright.html\n",
      "Indexing http://news.bilibili.com\n",
      "Indexing http://www.bilibili.com/video/music.html\n",
      "Indexing http://www.bilibili.com/video/tech-future-digital-1.html\n",
      "Indexing http://bangumi.bilibili.com/anime/index\n",
      "Indexing http://live.bilibili.com/e-sports\n",
      "Indexing http://link.acg.tv\n",
      "Indexing http://activity.bilibili.com/\n",
      "Indexing http://www.bilibili.com/video/ent-sports-1.html\n",
      "Indexing http://bangumi.bilibili.com/22/\n",
      "Indexing http://www.bilibili.com/video/game-video-1.html\n",
      "Indexing http://www.bilibili.com/mango\n",
      "Indexing http://www.bilibili.com/video/music-collection-1.html\n",
      "Indexing https://account.bilibili.com/login?act=exit\n",
      "Indexing http://www.bilibili.com/video/bagumi_offical_1.html\n",
      "Indexing http://www.bilibili.com/newlist.html\n",
      "Indexing http://www.bilibili.com/video/tech-future-other-1.html\n",
      "Indexing http://www.bilibili.com/video/av120040/\n",
      "Indexing http://www.bilibili.com/video/fashion-body-1.html\n",
      "Indexing http://live.bilibili.com/mobile-game\n",
      "Indexing http://www.bilibili.com/square\n",
      "Indexing http://www.bilibili.com/video/game-ctary-network-1.html\n",
      "Indexing http://zb.bilibili.com\n",
      "Indexing http://www.bilibili.com/video/douga.html\n",
      "Indexing http://www.bilibili.com/video/douga-mad-1.html\n",
      "Indexing http://game.bilibili.com/\n",
      "Indexing http://live.bilibili.com/subject\n",
      "Indexing http://www.bilibili.com/video/ent-variety-1.html\n",
      "Indexing http://www.bilibili.com/account/dynamic\n",
      "Indexing http://www.bilibili.com/video/music-oped-1.html\n",
      "Indexing http://h.bilibili.com/wallpaper?action=list\n",
      "Indexing http://www.bilibili.com/video/ent-painting-1.html\n",
      "Indexing http://live.bilibili.com/online\n",
      "Indexing http://www.bilibili.com/video/kichiku.html\n",
      "Indexing http://www.bilibili.com/video/tech-wild-1.html\n",
      "Indexing http://www.bilibili.com/video/speech-course-1.html\n",
      "Indexing http://www.bilibili.com/video/tech-popular-science-1.html\n",
      "Indexing http://live.bilibili.com/mobile\n",
      "Indexing http://www.bilibili.com/account/history\n",
      "Indexing http://www.bilibili.com/video/douga-else-information-1.html\n",
      "Indexing http://live.bilibili.com/draw\n",
      "Indexing http://app.bilibili.com/\n",
      "Indexing http://www.bilibili.com/video/ent-life-1.html\n",
      "Indexing http://www.bilibili.com/html/join.html\n",
      "Indexing http://www.bilibili.com/video/fashion-makeup-fitness-1.html\n",
      "Indexing http://bangumi.bilibili.com/33/\n",
      "Indexing http://h.bilibili.com/\n",
      "Indexing http://www.bilibili.com/video/douga-mmd-1.html\n",
      "Indexing http://www.bilibili.com/video/kichiku-course-1.html\n",
      "Indexing http://www.bilibili.com/video/gmv-1.html\n",
      "Indexing http://www.bilibili.com/video/bangumi_chinese_1.html\n",
      "Indexing http://game.bilibili.com\n",
      "Indexing http://bangumi.bilibili.com/tv/\n",
      "Indexing http://www.bilibili.com/video/douga-else-1.html\n",
      "Indexing http://live.bilibili.com\n",
      "Indexing https://account.bilibili.com/answer/addq\n",
      "Indexing http://app.bilibili.com\n",
      "Indexing http://www.bilibili.com/video/tech-fun-1.html\n",
      "Indexing http://www.bilibili.com/video/dance-1.html\n",
      "{'http://h.bilibili.com/dy190470', 'https://itunes.apple.com/cn/app/bi-li-bi-li-dong-huahd/id1093486973', 'http://link.acg.tv/home.php?mod=space&username=%E6%9C%88%E7%81%AFdesu', 'http://www.bilibili.com/video/part-twoelement-1.html', 'https://big.bilibili.com/site/big.html', 'http://bangumi.bilibili.com/anime/5773', 'http://www.bilibili.com/video/av7360943', 'http://www.bilibili.com/blackboard/activity-outfit01.html', 'http://docs.bilibili.cn/wiki/%E5%88%86%E7%B1%BB:Script', 'http://news.bilibili.com/news/34', 'http://live.bilibili.com/news', 'http://space.bilibili.com/76862108', 'http://acg.tv/u1i8', 'http://h.bilibili.com/member?mod=space&uid=25085312&act=p_index', 'http://h.bilibili.com/dy189496', 'http://b-gift.biligame.com', 'http://www.bilibili.com/video/av7740365', 'http://www.bilibili.com/newlist.html?page=4', 'http://h.bilibili.com/dy191705', 'http://www.bilibili.com/video/music-Cover-1.html', 'http://link.acg.tv/home.php?mod=space&username=%E9%9B%B6%E4%BB%B6%E5%96%B5', 'http://h.bilibili.com/dy192083', 'http://h.bilibili.com/member?mod=space&uid=2194628&act=p_index', 'http://big.bilibili.com/site/big.html', 'http://h.bilibili.com/dy183781', 'http://blhx.biligame.com/', 'http://h.bilibili.com/dy188860', 'http://www.bilibili.com/video/av7797965', 'http://acg.tv/u1fg', 'http://www.bilibili.com/video/av7863217', 'http://oxmhj.biligame.com/', 'https://www.bilibili.com/register/mail', 'http://www.bilibili.com/video/life.html', 'http://www.shjbzx.cn/jbpt/n57/n63/u1ai1816.html', 'http://www.bilibili.com/video/tv-sfx-1.html', 'http://www.bilibili.com/video/av7882980', 'http://h.bilibili.com/dy185687', 'http://bangumi.bilibili.com/anime/5615/play', 'http://www.bilibili.com/video/av7906781/', 'http://h.bilibili.com/member?mod=space&uid=284424&act=p_index', 'http://h.bilibili.com/member?mod=space&uid=56829276&act=p_index', 'http://live.bilibili.com/association', 'http://www.bilibili.com/newlist.html?page=10', 'http://www.sgamer.com/', 'http://www.bilibili.com/video/av7906688/', 'https://account.bilibili.com/login', 'http://h.bilibili.com/dy192022', 'https://live.bilibili.com/i', 'http://bbs.biligame.com/forum.php?mod=forumdisplay&fid=146', 'http://www.bilibili.com/video/av7763752', 'http://www.bilibili.com/video/ent-animal-1.html', 'http://www.bilibili.com/video/fashion-info-1.html', 'http://www.bilibili.com/video/av7838115', 'http://boli.biligame.com/', 'http://www.bilibili.com/video/ent.html', 'http://space.bilibili.com/13645841', 'http://www.bilibili.com/event?page=2', 'http://live.bilibili.com/liveHime', 'http://bangumi.bilibili.com/anime/5777', 'http://www.bilibili.com/video/ent-food-1.html', 'http://www.bilibili.com/video/av7906647/', 'https://message.bilibili.com', 'http://www.bilibili.com/video/av7852097', 'http://activity.hdslb.com/zzjs/copyright/%E9%99%84%E5%BD%95%EF%BC%9A%E6%81%A2%E5%A4%8D%E9%93%BE%E6%8E%A5%E7%94%B3%E8%AF%89%E8%A1%A8.doc', 'http://sjsn.biligame.com/', 'http://www.bilibili.com/video/music-vocaloid-1.html', 'http://www.bilibili.com/video/av7332655', 'http://www.bilibili.com/video/tech-fun-1.html', 'http://h.bilibili.com/member?mod=space&uid=38245638&act=p_index', 'http://member.bilibili.com/v/', 'http://www.bilibili.com/newlist.html?typeid=154', 'http://yoo.bilibili.com/newlist.html', 'http://link.acg.tv/home.php?mod=space&username=%E7%82%92%E8%82%A1%E7%82%92%E6%88%90%E7%A5%9E%E7%BB%8F%E7%97%85', 'http://search.bilibili.com/drawyoo?keyword=%E4%B8%9C%E6%96%B9', 'http://www.bilibili.com/html/join-list.html', 'http://live.bilibili.com/news/detail/52', 'http://activity.bilibili.com/activity/71', 'https://space.bilibili.com/', 'https://passport.bilibili.com/login', 'https://www.microsoft.com/store/apps/9nblggh5q5fv', 'https://www.bilibili.com/register/licence', 'http://www.bilibili.com/video/av7906741/', 'https://game.bilibili.com/', 'http://www.bilibili.com/video/av7881570', 'http://www.bilibili.com/blackboard/activity-madoka.html', 'http://bmall.bilibili.com', 'http://www.bilibili.com/blackboard/activity-planet2017-m.html', 'http://www.bilibili.com/video/movie_chinese_1.html', 'http://link.acg.tv/home.php?mod=space&username=Mousssss', 'https://www.bilibili.com/html/join.html', 'http://bml.bilibili.com/macro2016/', 'http://www.bilibili.com/video/av7906659/', 'http://99j.biligame.com/', 'http://ydsnywl.biligame.com/', 'http://www.bilibili.com/video/online.html', 'http://www.bilibili.com/video/av7852304', 'http://www.bilibili.com/html/cele.html', 'http://h.bilibili.com/member?mod=space&uid=21540303&act=p_index', 'http://bangumi.bilibili.com/movie/', 'http://www.bilibili.com/html/html5player.html?aid=3521416&cid=6041635', 'http://www.bilibili.com/video/technology.html', 'http://live.bilibili.com/movie', 'http://kfq.biligame.com/', 'http://bangumi.bilibili.com/anime/5025', 'http://h.bilibili.com/dy192016', 'http://h.bilibili.com/dy189364', 'http://www.bilibili.com/video/tech-future-military-1.html', 'http://link.acg.tv/home.php?mod=space&username=%E4%BA%91%E6%95%A3%E5%90%8E%E7%9A%84%E6%97%A5%E5%85%89%E9%9B%A8', 'http://live.bilibili.com/rank', 'http://www.bilibili.com/video/av7906690/', 'http://h.bilibili.com/dy190980', 'http://www.bilibili.com/list/b--a--t----d---1.html', 'http://www.bilibili.com/newlist.html?typeid=20', 'http://www.bilibili.com/video/av7904363', 'http://live.bilibili.com/', 'https://member.bilibili.com/v/video/submit.html', 'http://www.shjbzx.cn/jbpt/n57/n63/u1ai1450.html', 'http://www.bilibili.com/video/av7906643/', 'http://h.bilibili.com/dy191332', 'http://h.bilibili.com/dy191385', 'http://www.bilibili.com/video/av7519267', 'http://h.bilibili.com/dy191560', 'http://h.bilibili.com/dy192033', 'http://www.bilibili.com/video/av7432955', 'http://search.bilibili.com/drawyoo?keyword=SAI', 'http://www.bilibili.com/video/ent_funny_1.html', 'https://zb.bilibili.com', 'http://message.bilibili.com', 'http://live.bilibili.com/single', 'http://www.bilibili.com/newlist.html?typeid=138', 'http://h.bilibili.com/dy191277', 'http://bangumi.bilibili.com/anime/index', 'http://link.acg.tv/forum.php?mod=forumdisplay&fid=42', 'http://acg.tv/u1fu', 'http://activity.bilibili.com/', 'http://www.bilibili.com/video/teleplay.html', 'http://h.bilibili.com/member?mod=space&uid=10043049&act=p_index', 'http://h.bilibili.com/member?mod=space&uid=5075399&act=p_index', 'http://www.bilibili.com/blackboard/activity-2017lantern.html', 'http://bangumi.bilibili.com/anime/5615', 'http://yys.biligame.com/', 'http://h.bilibili.com/member?mod=space&uid=21857&act=p_index', 'http://h.bilibili.com/rank/24', 'http://www.bilibili.com/video/av7867760', 'http://www.bilibili.com/newlist.html?typeid=130', 'http://live.bilibili.com/news/detail/326', 'http://h.bilibili.com/dy192094', 'http://h.bilibili.com/member?mod=space&uid=1310264&act=p_index', 'https://account.bilibili.com/login?act=exit', 'http://space.bilibili.com/1328260/', 'http://www.bilibili.com/video/av120040/', 'http://bangumi.bilibili.com/anime/2543/play', 'http://h.bilibili.com/dy190035', 'http://www.bilibili.com/newlist.html?typeid=17', 'http://www.bilibili.com/video/av7906638/', 'http://www.bilibili.com/square', 'http://link.acg.tv/forum.php?mod=forumdisplay&fid=36', 'http://ntdgm.biligame.com/', 'http://www.bilibili.com/video/av7905463', 'http://search.bilibili.com/drawyoo?keyword=%E7%BB%83%E4%B9%A0', 'http://live.bilibili.com/102', 'http://h.bilibili.com/dy186096', 'http://h.bilibili.com/dy183880', 'http://bangumi.bilibili.com/anime/5776/play', 'http://www.bilibili.com/video/douga-mad-1.html', 'http://h.bilibili.com/member?mod=space&uid=7627492&act=p_index', 'http://link.acg.tv/forum.php?mod=guide&view=new', 'http://link.acg.tv/home.php?mod=space&username=%E3%81%A4%E8%BD%BB%E5%90%BB%E5%A4%A9%E8%BE%B9%E7%9A%84%E9%82%A3%E6%9C%B5%E7%99%BD%E4%BA%91', 'http://live.bilibili.com/subject', 'http://bangumi.bilibili.com/anime/5790', 'http://www.bilibili.com/account/dynamic', 'http://bangumi.bilibili.com/anime/5792', 'https://pay.bilibili.com/withdraw_in.html', 'http://www.bilibili.com/html/zt.html', 'http://www.bilibili.com/video/av7906750/', 'http://www.bilibili.com/newlist.html?typeid=31', 'http://space.bilibili.com/15604841', 'http://h.bilibili.com/dy190081', 'http://teos2.biligame.com/', 'http://www.bilibili.com/video/ent-painting-1.html', 'http://bizhi.sogou.com', 'https://obsproject.com/', 'http://www.bilibili.com/video/speech-course-1.html', 'http://live.bilibili.com/mobile', 'http://www.bilibili.com/account/history', 'http://www.bilibili.com/video/douga-else-information-1.html', 'http://bbs.biligame.com/forum.php?mod=forumdisplay&fid=150', 'http://h.bilibili.com/dy174094', 'http://bangumi.bilibili.com/anime/5778/play', 'http://madoka.biligame.com/', 'http://nono.biligame.com/yuyue/', 'http://b-gift.biligame.com/', 'http://www.bilibili.com/video/gmv-1.html', 'http://game.bilibili.com', 'http://bangumi.bilibili.com/tv/', 'http://www.bilibili.com/video/av7906618/', 'http://link.acg.tv/forum.php?mod=announcement&id=2', 'http://www.bilibili.com/video/av7906637/', 'http://h.bilibili.com/dy186992', 'http://www.windowsphone.com/zh-cn/store/app/%E5%93%94%E5%93%A9%E5%93%94%E5%93%A9%E5%8A%A8%E7%94%BB/75620dee-4a7a-4dae-8677-0d930e05f57e', 'http://h.bilibili.com/dy192036', 'http://link.acg.tv/forum.php', 'http://bbs.biligame.com', 'http://h.bilibili.com/dy191597', 'https://app.bilibili.com/', 'https://www.bilibili.com/login', 'http://bangumi.bilibili.com/anime/5523', 'http://www.bilibili.com/video/av7801856', 'http://h.bilibili.com/dy191715', 'http://www.bilibili.com/video/dance-1.html', 'http://www.bilibili.com/video/av4484996/', 'http://h.bilibili.com/member?mod=space&uid=26231129&act=p_index', 'http://yzr2.biligame.com/', 'http://czqst.biligame.com/', 'http://link.acg.tv/home.php?mod=space&username=%E5%84%AAP%E5%90%9B', 'http://comic.qq.com/', 'http://h.bilibili.com/member?mod=space&uid=11195621&act=p_index', 'http://www.bilibili.com/html/userdel_list.html', 'http://djsy.biligame.com/', 'http://bangumi.bilibili.com/anime/5538', 'http://zion.biligame.com/', 'http://search.bilibili.com/drawyoo?keyword=%E6%91%B8%E9%B1%BC', 'http://bangumi.bilibili.com/anime/5851', 'http://www.bilibili.com/video/av7900189', 'http://h.bilibili.com/dy191514', 'http://bangumi.bilibili.com/anime/5774/play', 'http://bangumi.bilibili.com/anime/2543', 'http://zb.bilibili.com/', 'http://bangumi.bilibili.com/anime/5523/play', 'http://link.acg.tv/forum.php?mod=redirect&tid=908&goto=lastpost', 'http://www.bilibili.com/video/av7906628/', 'http://h.bilibili.com/member?mod=space&uid=11554855&act=p_index', 'http://h.bilibili.com/dy183821', 'https://account.bilibili.com/site/coin', 'http://www.bilibili.com/video/av7757974', 'http://ma.biligame.com', 'http://www.bilibili.com/video/douga-kichiku-1.html', 'http://space.bilibili.com/login', 'http://www.bilibili.com/newlist.html?typeid=122', 'http://h.bilibili.com/dy190382', 'http://h.bilibili.com/dy173680', 'http://h.bilibili.com/dy191853', 'http://space.bilibili.com/resetpwd', 'http://weibo.com/bilibiliapp', 'http://news.bilibili.com/news/31', 'http://space.bilibili.com/19474154', 'http://activity.bilibili.com', 'http://www.bilibili.com/html/activity-dayuhaitang.html', 'http://www.bilibili.com/video/ad-ad-1.html', 'http://h.bilibili.com/rank/7', 'http://www.bilibili.com/video/av7776235', 'http://bangumi.bilibili.com/anime/5563', 'http://www.bilibili.com/video/av7906767/', 'https://pay.bilibili.com/charge_alipay.html', 'http://search.bilibili.com/drawyoo?keyword=%E5%90%8C%E4%BA%BA', 'http://www.shjbzx.cn/jbpt/n57/n63/u1ai1820.html', 'http://moe.biligame.com/yuyue/', 'http://bbs.biligame.com/forum.php?mod=forumdisplay&fid=211', 'http://yoo.bilibili.com/index.html', 'https://www.bilibili.com/account/history', 'http://www.bilibili.com/video/av7906589/', 'http://www.bilibili.com/html/aboutUs.html', 'http://www.bilibili.com/video/dance.html', 'http://acg.tv/u1g0', 'http://www.bilibili.com/video/three-dimension-dance-1.html', 'http://www.shjbzx.cn', 'http://security.bilibili.com/', 'http://link.acg.tv/home.php?mod=space&username=%E5%8F%B8%E5%8C%97%E5%90%91%E5%8D%97', 'http://search.bilibili.com/drawyoo?keyword=%E9%BB%91%E7%99%BD', 'http://h.bilibili.com/dy191836', 'http://h.bilibili.com/dy192026', 'http://h.bilibili.com/dy189637', 'http://bangumi.bilibili.com/anime/5791', 'http://activity.bilibili.com/list', 'http://h.bilibili.com/dy192084', 'http://h.bilibili.com/dy192041', 'http://www.bilibili.com/video/av7824846', 'http://h.bilibili.com/dy191884', 'http://bfzj.biligame.com/', 'http://h.bilibili.com/dy184457', 'http://h.bilibili.com/dy187377', 'http://www.bilibili.com/blackboard/activity_17bnj_folk.html', 'http://live.bilibili.com/news/detail/317', 'http://h.bilibili.com/dy190393', 'https://account.bilibili.com/resetpwd', 'https://account.bilibili.com/site/home', 'http://bbs.comicdd.com/', 'https://live.bilibili.com', 'http://www.bilibili.com/html/activity-2233birthday.html', 'http://www.bilibili.com/ranking', 'http://h.bilibili.com/dy191828', 'http://www.bilibili.com/video/av7906796/', 'http://member.bilibili.com/v/video/submit.html', 'http://www.bilibili.com/video/av7906588/', 'http://acg.tv/u1yP', 'http://h.bilibili.com/dy192096', 'http://live.bilibili.com/sing-dance', 'http://h.bilibili.com/dy192093', 'http://www.bilibili.com/html/friends-links.html', 'http://h.bilibili.com/dy191304', 'http://space.bilibili.com/928123', 'http://www.bilibili.com/video/av7906620/', 'http://www.bilibili.com/video/av7906583/', 'http://pan.baidu.com/s/1pJ608OV', 'https://www.bilibili.com/register', 'http://yoo.bilibili.com/video/online.html', 'http://acg.tv/u1iV', 'http://www.bilibili.com/video/av7805066', 'https://member.bilibili.com/v/', 'https://account.bilibili.com/answer/base', 'http://search.bilibili.com/drawyoo?keyword=%E5%8E%9A%E6%B6%82', 'https://passport.bilibili.com/site/site.html', 'http://h.bilibili.com/dy174768', 'http://www.bilibili.com/video/av7873285', 'http://space.bilibili.com/44473221', 'http://h.bilibili.com/dy190200', 'http://www.shjbzx.cn/jbpt/n57/n63/u1ai1451.html', 'http://h.bilibili.com/dy192043', 'http://yjdyc.biligame.com/yxz/', 'http://h.bilibili.com/dy187916', 'http://www.bilibili.com/video/av7646229', 'http://www.shjbzx.cn/jbpt/n57/n63/u1ai1817.html', 'http://link.acg.tv/search.php?mod=forum&srchtxt=%E6%92%AD%E6%94%BE%E5%99%A8&formhash=e9b69355&searchsubmit=true&source=hotsearch', 'http://www.bilibili.com/video/av7906624/', 'http://pinyin.sogou.com/skins/?=bilibili', 'http://h.bilibili.com/member?mod=space&uid=3765648&act=p_index', 'http://h.bilibili.com/member?mod=space&uid=2928839&act=p_index', 'http://h.bilibili.com/member?mod=space&uid=3813813&act=p_index', 'https://www.bilibili.com/html/cele.html', 'http://bangumi.bilibili.com/anime/5521', 'http://www.bilibili.com/video/av7906773/', 'http://search.bilibili.com/drawyoo?keyword=MIKU', 'https://www.bilibili.com/html/contact.html', 'http://www.bilibili.com/newlist.html?typeid=124', 'http://live.bilibili.com/news/index/1/1', 'http://h.bilibili.com/member?mod=space&uid=38923836&act=p_index', 'http://h.bilibili.com/dy192085', 'http://www.bilibili.com/video/av7901285', 'https://shop151111231.taobao.com/', 'http://h.bilibili.com/dy189011', 'http://www.bilibili.com/newlist.html?page=8', 'http://www.bilibili.com/video/bagumi_offical_1.html', 'https://account.bilibili.com/site/setting', 'http://bangumi.bilibili.com/anime/5851/play', 'http://www.bilibili.com/newlist.html', 'http://www.bilibili.com/lottery', 'http://space.bilibili.com/283538', 'http://www.bilibili.com/video/fashion-body-1.html', 'http://bangumi.bilibili.com/anime/5510', 'http://link.acg.tv/forum.php?mod=redirect&tid=972&goto=lastpost', 'http://live.bilibili.com/mobile-game', 'http://h.bilibili.com/dy191287', 'http://h.bilibili.com/dy190277', 'http://www.bilibili.com/video/game-ctary-network-1.html', 'http://www.bilibili.com/video/av7772646', 'http://h.bilibili.com/dy190837', 'https://link.acg.tv', 'http://link.acg.tv/forum.php?mod=redirect&tid=973&goto=lastpost', 'http://www.bilibili.com/video/av7906585/', 'http://www.bilibili.com/video/av7906654/', 'http://t.qq.com/bilibiliweb', 'http://www.shjbzx.cn/jbpt/n57/n63/u1ai1081.html', 'http://www.bilibili.com/video/ent-variety-1.html', 'http://hpjn.biligame.com/', 'http://h.bilibili.com/dy192091', 'http://h.bilibili.com/dy191525', 'http://www.bilibili.com/video/movie-movie-1.html', 'http://www.bilibili.com/video/av7820871', 'http://www.bilibili.com/video/tech-wild-1.html', 'http://link.acg.tv/home.php?mod=space&username=B%E7%AB%99%E7%99%BE%E7%A7%91%E5%85%A8%E4%B9%A6', 'http://xsqst.biligame.com/', 'http://www.bilibili.com/video/av7873772', 'http://www.bilibili.com/video/tech-popular-science-1.html', 'http://h.bilibili.com/list', 'http://www.bilibili.com/newlist.html?typeid=137', 'http://live.bilibili.com/all', 'https://www.bilibili.com/html/friends-links.html', 'http://h.bilibili.com/', 'http://www.bilibili.com/video/av7906737/', 'http://www.bilibili.com/video/av7906686/', 'http://h.bilibili.com/dy192056', 'http://h.bilibili.com/dy191995', 'http://www.bilibili.com/video/av7839025', 'http://h.bilibili.com/dy188668', 'http://www.bilibili.com/video/bangumi_chinese_1.html', 'http://news.bilibili.com/news/35', 'https://www.bilibili.com/video/av120040/', 'http://bangumi.bilibili.com/anime/5809', 'http://h.bilibili.com/dy190875', 'http://h.bilibili.com/dy188961', 'http://h.bilibili.com/member?mod=space&uid=65582242&act=p_index', 'https://itunes.apple.com/cn/app/bi-li-bi-li-dong-hua/id736536022', 'http://link.acg.tv/forum.php?mod=forumdisplay&fid=2', 'https://h.bilibili.com/', 'http://h.bilibili.com/dy191982', 'http://www.bilibili.com/html/activity-topimage-fgo.html', 'http://live.bilibili.com/224', 'http://www.bilibili.com/video/av7906615/', 'http://h.bilibili.com/member?mod=space&uid=309890&act=p_index', 'http://h.bilibili.com/dy191973', 'http://www.bilibili.com/video/tv-drama-1.html', 'http://www.bilibili.com/newlist.html?typeid=75', 'http://space.bilibili.com/52806762', 'http://h.bilibili.com/member?mod=space&uid=16765&act=p_index', 'https://account.bilibili.com/site/home.html', 'http://search.bilibili.com/drawyoo?keyword=%E5%B0%91%E5%A5%B3', 'http://h.bilibili.com/member?mod=space&uid=33281667&act=p_index', 'http://h.bilibili.com/dy189100', 'http://bangumi.bilibili.com/anime/4771', 'http://www.bilibili.com/video/av7764715', 'http://www.bilibili.com/blackboard/activity-2016MMD.html', 'http://www.bilibili.com/video/music-perform-1.html', 'http://www.bilibili.com/video/av7770952', 'http://bml.bilibili.com/index2016/?nav', 'http://www.bilibili.com/video/av7549042', 'http://h.bilibili.com/member?mod=space&uid=1475031&act=p_index', 'http://h.bilibili.com/dy192031', 'http://dl.hdslb.com/mobile/latest/iBiliPlayer-bili.apk', 'http://activity.hdslb.com/zzjs/copyright/%E9%99%84%E5%BD%95%EF%BC%9A%E4%BE%B5%E6%9D%83%E7%94%B3%E8%AF%89%E8%A1%A8.doc', 'http://www.bilibili.com/video/av7906630/', 'http://www.shjbzx.cn/jbpt/n57/n63/u1ai1815.html', 'http://link.acg.tv/forum.php?mobile=yes', 'http://search.bilibili.com/drawyoo?keyword=%E4%B8%B4%E6%91%B9', 'http://space.bilibili.com/', 'http://www.shjbzx.cn/jbpt/n57/n63/u1ai1452.html', 'http://fgo.biligame.com/event_santa', 'http://www.bilibili.com/video/movie.html', 'http://www.bilibili.com/video/ent-circle-1.html', 'http://www.bilibili.com/video/av7244731', 'https://secure.bilibili.com/login?sns=qq', 'http://h.bilibili.com/dy191792', 'http://www.bilibili.com/video/av7906623/', 'http://bangumi.bilibili.com/anime/timeline', 'http://www.bilibili.com/video/av7906776/', 'https://account.bilibili.com/site/identification', 'http://www.bilibili.com/video/av7890887', 'http://news.bilibili.com/news/37', 'http://rgdsj.biligame.com/', 'http://www.bilibili.com/newlist.html?page=2', 'http://bbs.biligame.com/forum.php?mod=viewthread&tid=134549&extra=', 'http://live.bilibili.com/news/detail/325', 'http://www.discuz.net', 'http://zcsmw.biligame.com/yuyue/', 'http://bangumi.bilibili.com/anime/5795', 'http://www.bilibili.com/video/av7906699/', 'http://bbs.biligame.com/forum.php?mod=forumdisplay&fid=139', 'http://www.bilibili.com/video/ent-Kichiku-1.html', 'https://h.bilibili.com/wallpaper?action=list', 'http://space.bilibili.com/8770537', 'http://link.acg.tv/forum.php?gid=1', 'http://activity.bilibili.com/activity/75', 'https://www.bilibili.com/', 'http://kf.biligame.com/', 'https://pay.bilibili.com/bb_balance.html', 'http://www.bilibili.com/video/kichiku-manual_vocaloid-1.html', 'http://h.bilibili.com/dy191826', 'http://www.bilibili.com/random', 'http://bbs.biligame.com/forum.php?mod=forumdisplay&fid=173', 'http://hywz.biligame.com/', 'http://www.biligame.com/', 'http://bangumi.bilibili.com/anime/5777/play', 'http://link.acg.tv/search.php?mod=forum&srchtxt=%E5%AE%A2%E6%88%B7%E7%AB%AF&formhash=e9b69355&searchsubmit=true&source=hotsearch', 'http://www.bilibili.com/video/tv-micromovie-1.html', 'http://www.bilibili.com/newlist.html?typeid=131', 'http://bbs.biligame.com/forum.php', 'http://live.bilibili.com/round', 'http://www.bilibili.com/video/bgm_calendar.html', 'http://link.acg.tv/forum.php?mod=redirect&tid=683&goto=lastpost', 'http://www.bilibili.com/video/av7853081', 'http://www.bilibili.com/html/live.bilibili.com/liveHime', 'http://link.acg.tv/bilibili_connect.php?mod=auth&op=login', 'http://bangumi.bilibili.com/anime/5786', 'http://bangumi.bilibili.com/moe/2016/jp/index', 'http://www.bilibili.com/video/av7906749/', 'http://www.bilibili.com/html/activity-VocaloidCover.html', 'http://member.bilibili.com/', 'https://zb.bilibili.com/', 'http://www.bilibili.com/html/help.html', 'http://h.bilibili.com/member?mod=space&act=p_person', 'http://planet2017.bilibili.com/', 'http://live.bilibili.com/hd/guide', 'http://member.bilibili.com/video_submit.html?tpl=upload', 'http://weibo.com/u/5245903632', 'http://www.bilibili.com/video/av7852648', 'http://www.bilibili.com/video/av7871280', 'http://www.bilibili.com/video/bangumi-two-1.html', 'http://h.bilibili.com/member?mod=space&uid=14358664&act=p_index', 'http://live.bilibili.com/139', 'http://www.bilibili.com/register', 'https://account.bilibili.com/site/ident.html', 'http://www.bilibili.com/video/ent-korea-1.html', 'http://www.bilibili.com/video/fashion-clothing-1.html', 'http://search.bilibili.com/drawyoo?keyword=%E6%9D%BF%E7%BB%98', 'http://acg.tv/u1g3', 'http://bangumi.bilibili.com/anime/5542', 'http://link.acg.tv/forum.php?mod=redirect&tid=963&goto=lastpost', 'http://www.bilibili.com/html/copyright.html', 'http://acg.tv/u1js', 'http://h.bilibili.com/dy191855', 'http://live.bilibili.com/277', 'http://www.shjbzx.cn/jbpt/n57/n63/u1ai1819.html', 'http://www.bilibili.com/video/tech-future-digital-1.html', 'http://www.bilibili.com/newlist.html?page=7', 'http://search.bilibili.com/drawyoo?keyword=%E5%8E%9F%E5%88%9B', 'http://search.bilibili.com/drawyoo?keyword=%E9%BC%A0%E7%BB%98', 'http://search.bilibili.com/drawyoo?keyword=%E6%89%8B%E7%BB%98', 'http://www.bilibili.com/event?page=3', 'http://comic.qq.com/news/', 'http://bangumi.bilibili.com/moe/2016/cn/index', 'http://www.bilibili.com/video/ent-sports-1.html', 'http://www.bilibili.com/video/tv-presentation-1.html', 'http://www.bilibili.com/video/av7837549', 'http://h.bilibili.com/dy191089', 'http://ro.biligame.com/yuyue/', 'https://www.bilibili.com/html/copyright.html', 'http://www.bilibili.com/mango', 'http://bangumi.bilibili.com/anime/5521/play', 'http://space.bilibili.com/35954237', 'http://bnj.biligame.com/', 'http://h.bilibili.com/dy192025', 'http://h.bilibili.com/dy189147', 'http://www.bilibili.com/video/av7799459', 'http://h.bilibili.com/dy182219', 'http://h.bilibili.com/dy190926', 'http://www.bilibili.com/video/av7906733/', 'http://h.bilibili.com/dy185198', 'http://h.bilibili.com/upload.html', 'http://h.bilibili.com/dy192080', 'http://www.bilibili.com/video/music-collection-1.html', 'http://live.bilibili.com/news/index/1/2', 'http://bh3.biligame.com/djyxds/', 'http://h.bilibili.com/dy192082', 'http://www.bilibili.com/video/tech-future-other-1.html', 'http://h.bilibili.com/dy192090', 'http://www.bilibili.com/video/av7906650/', 'http://www.bilibili.com/video/av7906609/', 'http://h.bilibili.com/dy189021', 'https://pay.bilibili.com/bk_balance.html', 'http://zwfz.biligame.com/', 'http://h.bilibili.com/dy192095', 'http://search.bilibili.com/drawyoo?keyword=%E6%B0%B4%E5%BD%A9', 'http://www.bilibili.com/blackboard/live-stage.html', 'http://www.bilibili.com/newlist.html?typeid=71', 'http://www.bilibili.com/video/douga.html', 'http://www.bilibili.com/video/movie_west_1.html', 'http://dhh.biligame.com/', 'https://account.bilibili.com/register/licence', 'http://www.bilibili.com/newlist.html?page=11', 'http://space.bilibili.com/367474', 'https://account.bilibili.com/site/face.html', 'http://www.bilibili.com/video/av7906648/', 'http://bangumi.bilibili.com/anime/5789', 'http://bml.bilibili.com/index2016/', 'http://h.bilibili.com/dy184402', 'http://www.bilibili.com/video/music-oped-1.html', 'http://h.bilibili.com/wallpaper?action=list', 'http://lxjjx.biligame.com/', 'http://live.bilibili.com/274', 'http://space.bilibili.com/39597463', 'http://h.bilibili.com/member?mod=space&uid=1328272&act=p_index', 'http://www.bilibili.com/video/kichiku.html', 'http://www.bilibili.com/newlist.html?page=3', 'http://bbs.biligame.com/forum.php?mod=forumdisplay&fid=127', 'http://big.bilibili.com/site/points.html', 'http://h.bilibili.com/member?mod=space&uid=4298083&act=p_index', 'http://h.bilibili.com/dy192092', 'http://www.bilibili.com/video/ent-life-1.html', 'https://bml.bilibili.com/index2016/?nav', 'http://h.bilibili.com/dy191978', 'http://h.bilibili.com/admin', 'http://www.bilibili.com/video/av7906808/', 'http://www.bilibili.com/html/join.html', 'https://yoo.bilibili.com/index.html', 'https://pay.bilibili.com/trade_list_out.html', 'http://www.bilibili.com/video/fashion-makeup-fitness-1.html', 'http://bangumi.bilibili.com/33/', 'http://www.bilibili.com/video/av7906621/', 'http://www.bilibili.com/video/av7764705', 'http://link.acg.tv/home.php?mod=space&username=fanorange', 'http://www.bilibili.com/video/av7839233', 'http://bangumi.bilibili.com/anime/5778', 'http://bangumi.bilibili.com/anime/5830', 'https://pay.bilibili.com/withdraw_out.html', 'http://h.bilibili.com/member?mod=space&act=p_attention', 'http://www.shjbzx.cn/jbpt/n57/n63/u1ai1811.html', 'http://h.bilibili.com/dy175946', 'http://bangumi.bilibili.com/anime/3287', 'http://e.weibo.com/bilibiliweb?ref=http%3A%2F%2Fwww.bilibili.com%2F', 'http://comic.sina.com.cn/', 'http://www.bilibili.com/newlist.html?typeid=95', 'http://h.bilibili.com/dy191341', 'http://bangumi.bilibili.com/anime/5793', 'http://activity.bilibili.com/activity/67', 'http://live.bilibili.com', 'http://link.acg.tv/forum.php?mod=forumdisplay&fid=41', 'http://www.bilibili.com/video/av7906689/', 'https://account.bilibili.com/answer/addq', 'http://link.acg.tv/member.php?mod=register', 'https://activity.bilibili.com/', 'http://zb.bilibili.com', 'http://www.shjbzx.cn/jbpt/n57/n63/u1ai1085.html', 'http://h.bilibili.com/dy192088', 'http://teos2.biligame.com/htspds/', 'http://www.bilibili.com/video/av7906601/', 'http://www.bilibili.com/video/ent-handmake-1.html', 'http://www.bilibili.com/video/movie_japan_1.html', 'https://account.bilibili.com/register/phone', 'http://h.bilibili.com/dy191196', 'http://h.bilibili.com/member?mod=space&uid=32596277&act=p_index', 'http://h.bilibili.com/dy188667', 'https://www.bilibili.com/html/help.html', 'http://bangumi.bilibili.com/anime/5861/play', 'http://h.bilibili.com/dy192035', 'http://www.bilibili.com/video/av7751211', 'http://www.bilibili.com/video/av3245397/', 'http://touken.biligame.com', 'http://h.bilibili.com/browse', 'http://www.bilibili.com/video/game.html', 'http://www.bilibili.com/video/av7906656/', 'http://www.bilibili.com/video/av7906592/', 'http://h.bilibili.com/dy187046', 'http://www.bilibili.com/video/av7849016', 'https://account.bilibili.com/site', 'http://www.bilibili.com/newlist.html?page=1839', 'http://100p.biligame.com/sd/', 'http://bangumi.bilibili.com/anime/5510/play', 'http://www.bilibili.com/newlist.html?typeid=157', 'http://h.bilibili.com/dy191350', 'http://h.bilibili.com/member?mod=space&uid=2774688&act=p_index', 'http://search.bilibili.com/drawyoo?keyword=%E9%93%85%E7%AC%94', 'https://account.bilibili.com/', 'http://sglms.biligame.com/', 'http://live.bilibili.com/i', 'http://www.bilibili.com/video/music-original-1.html', 'http://bangumi.bilibili.com/anime/5787', 'http://www.bilibili.com/video/av7906658/', 'http://h.bilibili.com/member?mod=space&uid=17147129&act=p_index', 'http://www.bilibili.com/video/av7893817', 'http://h.bilibili.com/member?mod=space&uid=772288&act=p_index', 'http://game.bilibili.com/record.html', 'http://www.bilibili.com/video/av7906740/', 'http://bangumi.bilibili.com/anime/5861', 'http://www.bilibili.com/video/av7905271', 'http://www.bilibili.com/topic/1499.html', 'http://bangumi.bilibili.com/anime/5784/play', 'http://h.bilibili.com/dy191334', 'http://h.bilibili.com/dy191204', 'http://bangumi.bilibili.com/anime/5800', 'http://bangumi.bilibili.com/anime/3532', 'http://link.acg.tv/forum.php?mod=redirect&tid=979&goto=lastpost', 'http://tank.biligame.com/', 'https://pay.bilibili.com/', 'http://link.acg.tv/forum.php?mod=misc&action=showdarkroom', 'http://live.bilibili.com/ent-life', 'http://www.bilibili.com/video/av7834435', 'http://www.bilibili.com/html/activity-TGS2016.html', 'http://www.bilibili.com/video/douga-voice-1.html', 'http://www.bilibili.com/video/dance-demo-1.html', 'http://zwfz.biligame.com/yuyue/', 'http://live.bilibili.com/news/detail/329', 'http://bml.bilibili.com/sp2016/', 'http://www.bilibili.com/video/av7861652', 'http://h.bilibili.com/dy190712', 'http://link.acg.tv/forum.php?mod=viewthread&tid=2&extra=page%3D1', 'http://h.bilibili.com/member?mod=space&uid=4873082&act=p_index', 'https://www.bilibili.com/event', 'http://eva.biligame.com/yuyue/', 'http://activity.bilibili.com/activity/73', 'http://www.bilibili.com/newlist.html?page=6', 'http://www.bilibili.com/', 'http://www.bilibili.com/newlist.html?typeid=76', 'http://bangumi.bilibili.com/anime/5784', 'http://space.bilibili.com/17373244', 'http://h.bilibili.com/dy190613', 'http://www.comsenz.com', 'https://www.bilibili.com/html/aboutUs.html', 'http://www.bilibili.com/video/game-mugen-1.html', 'https://account.bilibili.com/site/record', 'http://www.bilibili.com/video/movie-presentation-1.html', 'http://live.bilibili.com/otaku', 'http://www.bilibili.com/video/music-game-1.html', 'http://touken.biligame.com/yuyue/', 'http://link.acg.tv/home.php?mod=space&username=%E5%8C%97%E8%BE%B0%E5%B7%9D', 'http://h.bilibili.com/member?mod=space&uid=1917685&act=p_index', 'http://www.bilibili.com/video/av7768484', 'http://jxsj.biligame.com/', 'http://space.bilibili.com/15164973', 'http://www.bilibili.com/video/fashion.html', 'http://h.bilibili.com/dy191927', 'http://bangumi.bilibili.com/anime/5830/play', 'http://www.bilibili.com/html/contact.html', 'http://h.bilibili.com/member?mod=space&uid=699438&act=p_index', 'http://www.bilibili.com/video/av7906594/', 'http://www.bilibili.com/video/av7906587/', 'http://live.bilibili.com/news/index/1/3', 'http://anime.kankan.com/', 'http://space.bilibili.com/1761432', 'http://www.bilibili.com/video/soap-three-1.html', 'http://h.bilibili.com/dy191251', 'http://mfxy.biligame.com/', 'http://search.bilibili.com/drawyoo?keyword=PS', 'http://link.acg.tv/', 'http://www.bilibili.com/video/music-coordinate-1.html', 'http://h.bilibili.com/dy177006', 'http://news.bilibili.com', 'http://www.bilibili.com/video/music.html', 'http://www.bilibili.com/video/av7867707', 'http://www.bilibili.com/video/av7877938', 'http://h.bilibili.com/dy192086', 'http://live.bilibili.com/e-sports', 'http://link.acg.tv', 'http://www.bilibili.com/video/av7884418', 'http://www.bilibili.com/video/av3269050/', 'http://bangumi.bilibili.com/22/', 'http://h.bilibili.com/dy192089', 'http://www.bilibili.com/search?keyword=%E9%AB%98%E7%BA%A7%E5%BC%B9%E5%B9%95+%E6%95%99%E7%A8%8B&orderby=&formsubmit=', 'http://news.bilibili.com/news/36', 'http://h.bilibili.com/dy191568', 'http://www.bilibili.com/video/game-video-1.html', 'http://bh3.biligame.com/', 'http://www.bilibili.com/video/av7906597/', 'http://gf.biligame.com/', 'http://space.bilibili.com/846318', 'http://game.bilibili.com/jz/jiazhang.htm', 'http://www.bilibili.com/video/av7906626/', 'http://www.bilibili.com/video/av7852873', 'http://live.bilibili.com/help', 'http://weibo.com/p/1005053746119300', 'http://link.acg.tv/home.php?mod=space&username=%E5%83%B5%E5%B0%B8%E6%B3%A1%E6%B3%A1', 'http://space.bilibili.com/14452610', 'http://www.shjbzx.cn/jbpt/n57/n68/index.html', 'https://bilibili2233.taobao.com/', 'http://acg.tv/u1de', 'http://h.bilibili.com/dy189325', 'http://discuz.qq.com/service/security', 'http://bangumi.bilibili.com/anime/5794', 'http://acg.tv/u1fv', 'http://www.bilibili.com/blackboard/activity_17bnj_pray.html', 'http://h.bilibili.com/dy189208', 'http://live.bilibili.com/news/detail/37', 'http://space.bilibili.com/11443550', 'http://h.bilibili.com/member?mod=space&act=p_favourite', 'http://h.bilibili.com/dy191361', 'https://account.bilibili.com/site/nameplate.html', 'http://bangumi.bilibili.com/anime/5776', 'http://bangumi.bilibili.com/anime/5774', 'http://bangumi.bilibili.com/anime/5549', 'http://www.bilibili.com/event?page=4', 'https://www.bilibili.com/account/dynamic', 'http://game.bilibili.com/gamelist.html', 'http://space.bilibili.com/register/phone', 'http://game.bilibili.com/', 'http://h.bilibili.com/dy191647', 'http://big.bilibili.com/site/get-big-pc.html', 'http://h.bilibili.com/dy183785', 'http://www.bilibili.com/blackboard/big.html', 'http://h.bilibili.com/dy192075', 'http://h.bilibili.com/dy192046', 'http://www.bilibili.com/video/av7822800', 'http://www.bilibili.com/video/av7519283', 'http://hxzj.biligame.com/', 'http://activity.bilibili.com/activity/72', 'http://tank.biligame.com/remix/', 'http://h.bilibili.com/member?mod=space&uid=201448&act=p_index', 'https://shop163539432.taobao.com', 'http://h.bilibili.com/dy181931', 'http://live.bilibili.com/online', 'http://h.bilibili.com/rank/31', 'http://www.bilibili.com/video/av7906649/', 'http://h.bilibili.com/member?mod=space&uid=5030761&act=p_index', 'http://h.bilibili.com/member?mod=space&uid=7355851&act=p_index', 'http://app.bilibili.com/', 'http://cq.biligame.com/', 'http://live.bilibili.com/draw', 'http://yoo.bilibili.com/', 'http://www.bilibili.com/video/av7906612/', 'http://www.bilibili.com/newlist.html?page=9', 'http://live.bilibili.com/260', 'https://secure.bilibili.com/login?sns=weibo', 'http://search.bilibili.com/drawyoo?keyword=%E6%B8%A3', 'http://h.bilibili.com/dy175013', 'http://www.bilibili.com/video/douga-mmd-1.html', 'http://www.bilibili.com/video/av7906663/', 'http://www.bilibili.com/video/kichiku-course-1.html', 'http://space.bilibili.com', 'http://www.bilibili.com/newlist.html?typeid=30', 'http://link.acg.tv/forum.php?mod=forumdisplay&fid=37', 'http://bbs.biligame.com/forum.php?mod=forumdisplay&fid=44', 'http://h.bilibili.com/dy189481', 'http://www.bilibili.com/newlist.html?page=5', 'http://www.bilibili.com/video/av7906619/', 'http://live.bilibili.com/liveHime/', 'http://www.bilibili.com/video/douga-else-1.html', 'http://bangumi.bilibili.com/anime/3462', 'http://h.bilibili.com/dy183789', 'http://h.bilibili.com/dy191685', 'http://app.bilibili.com', 'http://www.bilibili.com/newlist.html?typeid=163', 'https://planet2017.bilibili.com/', 'http://100p.biligame.com/', 'http://live.bilibili.com/news/detail/327', 'http://www.biligame.com', 'http://link.acg.tv/forum.php?mod=forumdisplay&fid=38'}\n"
     ]
    }
   ],
   "source": [
    "page_list = ['http://www.bilibili.com']\n",
    "crawler.crawl(page_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(2, 1),\n",
       " (760, 3),\n",
       " (1119, 9),\n",
       " (1393, 12),\n",
       " (1773, 13),\n",
       " (8365, 18),\n",
       " (8785, 20),\n",
       " (9325, 23),\n",
       " (9754, 27),\n",
       " (9848, 28),\n",
       " (10387, 37),\n",
       " (10392, 38),\n",
       " (12809, 41),\n",
       " (13589, 43),\n",
       " (13875, 46),\n",
       " (14239, 48),\n",
       " (14278, 49),\n",
       " (16346, 53),\n",
       " (16668, 54),\n",
       " (18110, 74),\n",
       " (18566, 81),\n",
       " (24459, 82),\n",
       " (24817, 88),\n",
       " (25579, 92),\n",
       " (27010, 100),\n",
       " (28498, 107),\n",
       " (29067, 111),\n",
       " (29787, 121),\n",
       " (29881, 123)]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[row for row in crawler.con.execute('select rowid,urlid from wordlocation where wordid=2')]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Querying "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "class Searcher:\n",
    "    def __init__(self, db_name):\n",
    "        self.con = sqlite3.connect(db_name)\n",
    "    \n",
    "    def __del__(self):\n",
    "        self.con.close()\n",
    "        \n",
    "    def get_match_rows(self, q):\n",
    "        # Strings to build query\n",
    "        field_list = 'w0.urlid'\n",
    "        table_list = ''\n",
    "        clause_list = ''\n",
    "        word_ids = []\n",
    "        \n",
    "        # Split words by spaces\n",
    "        words = q.split(' ')\n",
    "        table_number = 0\n",
    "        \n",
    "        for word in words:\n",
    "            # get id of the word\n",
    "            word_row = self.con.execute(\"select rowid from wordlist where word = '%s'\" % word).fetchone()\n",
    "            if word_row != None:\n",
    "                word_id = word_row[0]\n",
    "                word_ids.append(word_id)\n",
    "                if table_number>0:\n",
    "                    table_list += ','\n",
    "                    clause_list += ' and '\n",
    "                    clause_list+='w%d.urlid=w%d.urlid and ' % (table_number-1,table_number)\n",
    "                field_list+=',w%d.location' % table_number\n",
    "                table_list+='wordlocation w%d' % table_number      \n",
    "                clause_list+='w%d.wordid=%d' % (table_number,word_id)\n",
    "                table_number+=1\n",
    "                \n",
    "        # Create the query from the separate parts\n",
    "        full_query='select %s from %s where %s' % (field_list,table_list,clause_list)\n",
    "        print (full_query)\n",
    "        cur=self.con.execute(full_query)\n",
    "        rows=[row for row in cur]\n",
    "        \n",
    "        return rows, word_ids\n",
    "    \n",
    "    def get_scored_list(self, rows, word_ids):\n",
    "        total_scores = dict([(row[0], 0) for row in rows])\n",
    "        \n",
    "        # score function\n",
    "        weighs = [(0.0, self.frequency_score(rows)),\n",
    "                 (1.0, self.location_score(rows)),\n",
    "                 (0.0, self.distance_score(rows)),\n",
    "                 (0.0, self.inbound_link_score(rows)),\n",
    "                 (1.0, self.page_rank_score(rows))]\n",
    "        \n",
    "        for (weight, scores) in weighs:\n",
    "            for url in total_scores:\n",
    "                total_scores[url] += weight*scores[url]\n",
    "                \n",
    "        return total_scores\n",
    "    \n",
    "    def get_url_name(self, id):\n",
    "        return self.con.execute(\"select url from urllist where rowid = %d\" % id).fetchone()[0]\n",
    "    \n",
    "    def query(self, q):\n",
    "        rows, word_ids = self.get_match_rows(q)\n",
    "        scores = self.get_scored_list(rows, word_ids)\n",
    "        ranked_scores = sorted([(score, url) for (url, score) in scores.items()], reverse=1)\n",
    "        for (score, url_id) in ranked_scores[0:10]:\n",
    "            print('%f\\t%s' % (score, self.get_url_name(url_id)))\n",
    "            \n",
    "    def normalize_scores(self, scores, small_is_better=0):\n",
    "        v_small = 0.00001 # Avoid division by zero errors\n",
    "        if small_is_better:\n",
    "            min_score = min(scores.values())\n",
    "            return dict([(u, float(min_score)/max(v_small,1)) for (u, l) in scores.items()])\n",
    "        else:\n",
    "            max_score = max(scores.values())\n",
    "            if max_score == 0:\n",
    "                max_score = v_small\n",
    "            return dict([(u, float(c)/max_score) for (u, c) in scores.items()])\n",
    "        \n",
    "    def frequency_score(self, rows):\n",
    "        counts = dict([(row[0], 0) for row in rows])\n",
    "        for row in rows:\n",
    "            counts[row[0]] += 1\n",
    "        return self.normalize_scores(counts)\n",
    "    \n",
    "    def location_score(self, rows):\n",
    "        locations = dict([(row[0], 0) for row in rows])\n",
    "        for row in rows:\n",
    "            loc = sum(row[1:])\n",
    "            if loc<locations[row[0]]:\n",
    "                locations[row[0]] = loc\n",
    "        return self.normalize_scores(locations, small_is_better=1)\n",
    "    \n",
    "    def distance_score(self, rows):\n",
    "        # if there's only one word, everyone wins!\n",
    "        if len(rows[0])<2:\n",
    "            return dict([(row[0], 1.0) for row in rows])\n",
    "        \n",
    "        # Initial dict with large values\n",
    "        min_distance = dict([(row[0], 1000000) for row in rows])\n",
    "        \n",
    "        for row in rows:\n",
    "            dist = sum([abs(row[i]-row[i-1]) for i in range(2, len(row))])\n",
    "            if dist<min_distance[row[0]]:\n",
    "                min_distance[row[0]] = dist\n",
    "        return self.normalize_scores(min_distance, small_is_better=1)\n",
    "    \n",
    "    def inbound_link_score(self, rows):\n",
    "        unique_urls = dict([(row[0], 1) for row in rows])\n",
    "        inbound_count = dict([(u,self.con.execute('select count(*) from link where toid=%d' % u)\n",
    "                                   .fetchone()[0]) for u in unique_urls])\n",
    "        return self.normalize_scores(inbound_count) \n",
    "    \n",
    "    def page_rank_score(self,rows):\n",
    "        pageranks=dict([(row[0],self.con.execute('select score from pagerank where urlid=%d' % row[0]).fetchone()[0]) for row in rows])\n",
    "        maxrank=max(pageranks.values())\n",
    "        normalizedscores=dict([(u,float(l)/maxrank) for (u,l) in pageranks.items()])\n",
    "        return normalizedscores\n",
    "    \n",
    "    def linktextscore(self,rows,wordids):\n",
    "        linkscores=dict([(row[0],0) for row in rows])\n",
    "        for wordid in wordids:\n",
    "            cur=self.con.execute('select link.fromid,link.toid from linkwords,link where wordid=%d and linkwords.linkid=link.rowid' % wordid)\n",
    "        for (fromid,toid) in cur:\n",
    "            if toid in linkscores:\n",
    "            pr=self.con.execute('select score from pagerank where urlid=%d' % fromid).fetchone()[0]\n",
    "            linkscores[toid]+=pr\n",
    "        maxscore=max(linkscores.values())\n",
    "        normalizedscores=dict([(u,float(l)/maxscore) for (u,l) in linkscores.items()])\n",
    "        return normalizedscores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "select w0.urlid,w0.location,w1.location from wordlocation w0,wordlocation w1 where w0.wordid=5 and w0.urlid=w1.urlid and w1.wordid=24\n",
      "1.000000\thttp://message.bilibili.com\n",
      "0.990608\thttp://app.bilibili.com/\n",
      "0.983514\thttp://www.bilibili.com/html/cele.html\n",
      "0.980617\thttp://h.bilibili.com/wallpaper?action=list\n",
      "0.974274\thttp://www.bilibili.com/html/contact.html\n",
      "0.972971\thttp://www.bilibili.com/html/friends-links.html\n",
      "0.971005\thttp://www.bilibili.com/html/aboutUs.html\n",
      "0.952633\thttps://account.bilibili.com/site/home\n",
      "0.950351\thttps://pay.bilibili.com/\n",
      "0.948912\thttps://www.bilibili.com/register\n"
     ]
    }
   ],
   "source": [
    "e = Searcher('searchindex.db')\n",
    "e.query('bilibili 直播')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## PageRank "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iteration 0\n",
      "Iteration 1\n",
      "Iteration 2\n",
      "Iteration 3\n",
      "Iteration 4\n",
      "Iteration 5\n",
      "Iteration 6\n",
      "Iteration 7\n",
      "Iteration 8\n",
      "Iteration 9\n",
      "Iteration 10\n",
      "Iteration 11\n",
      "Iteration 12\n",
      "Iteration 13\n",
      "Iteration 14\n",
      "Iteration 15\n",
      "Iteration 16\n",
      "Iteration 17\n",
      "Iteration 18\n",
      "Iteration 19\n"
     ]
    }
   ],
   "source": [
    "crawler.calculate_pagerank()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('https://passport.bilibili.com/login', 0.384453),\n",
       " ('http://www.bilibili.com/html/join-list.html', 0.362684),\n",
       " ('https://account.bilibili.com/login?act=exit', 0.275827),\n",
       " ('https://account.bilibili.com/answer/addq', 0.264513),\n",
       " ('http://live.bilibili.com/i', 0.262633),\n",
       " ('http://message.bilibili.com', 0.250216),\n",
       " ('http://www.bilibili.com/html/join.html', 0.250216),\n",
       " ('http://app.bilibili.com/', 0.247866),\n",
       " ('http://www.bilibili.com/video/av120040/', 0.247764),\n",
       " ('http://www.bilibili.com/html/cele.html', 0.246091)]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[row for row in crawler.con.execute('select urllist.url,pagerank.score from pagerank,urllist \\\n",
    "                                    where urllist.rowid=pagerank.urlid order by pagerank.score desc')][0:10]"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
